/*
THIS FILE BUILDS IDR APPLICATION AND ENROLLMENT DATA SETS

AUTHOR: LT 
CREATED: 6/23/2023
LAST UPDATED: 8/23/2023 
*/

clear
set niceness 0
set segmentsize 3g
set more off 

gl original = ""
gl working  = ""
gl output   = ""

cd "${original}"



/*******************************************************************************/
*** CPI-U
insheet using "${working}\cpiu.csv", names clear

for any 1 2 3 4 5 6 7 8 9 10 11 12 \ any jan feb mar apr may jun jul aug sep oct nov dec : rename Y cpiX

reshape long cpi, i(year) j(month)

save "${working}\cpi", replace

/*******************************************************************************/
*** DATA SET WITH INFLATION ADJUSTED RUNNING VARIABLE
** BASED ON MONTH OF INITIAL APPLICATION

unzipfile     "idrrd_v42_0.csv.zip" , replace
insheet using "idrrd_v42_0.csv", names clear 
erase         "idrrd_v42_0.csv"
	
rename v1 year 
gen month = real(substr(v2,6,2))
	
merge m:1 year month using "${working}\cpi", keep(1 3) nogen
	
* JAN 2022 $	
loc num = 281.148
	
replace cpi = `num'/cpi
	
replace donut = v3>0 & v3<=200
gen d = v3<=0

replace v3 = round(v3*cpi)

rename year v1

* WINSORIZE RUNNING VARIABLE AT 1ST & 99TH PERCENTILES (W/IN APP YEAR)
forv y = 2015/2018 {
	sum v3 if v1 == `y', det
	replace v3 = r(p99) if v3>r(p99) & v3<. & v1 == `y'
	replace v3 = r(p1) if v3<r(p1) & v1 == `y'
}


gen dv3 = d*v3
	
rename v4 v40
gen v4_nom0 = v4
replace v40 = round(v40*cpi)
	
keep v5 donut v3 d dv3 month v1 cpi v40 v4_nom0 

xi, prefix(Y) i.v1

drop month 

compress

save "${working}\treatment", replace



**#****************************************************************************
*** PROCESS APPLICATION DATA 
** LOOP OVER APP YEARS
forv i = 2015/2018 {
		
	unzipfile "idrrd_apps_fa`i'.csv.zip" , replace
	insheet using "idrrd_apps_fa`i'.csv", names clear 
	erase "idrrd_apps_fa`i'.csv"
	
* NUMBER OF MONTHS BETWEEN APPLICATION AND INITIAL APPLICATION
	gen v42 = ym(real(substr(v2 ,1,4)), real(substr(v2 ,6,2))) - ///
		ym(real(substr(first_v2 ,1,4)), real(substr(first_v2  ,6,2)))

* ADJUST FOR INFLATION
	gen year = real(substr(v2,1,4))
	gen month = real(substr(v2,6,2))
	
	merge m:1 year month using "${working}\cpi", keep(1 3) nogen
	
* JAN 2022 $	
	loc num = 281.148
	
	replace cpi = `num'/cpi
	
	replace v3 = round(v3*cpi)
	
* KEEP $6500 BW RD SAMPLE
	gen ind = abs(v3)<6500 & v42 ==0
	bysort v5: egen samp = max(ind)
	keep if samp==1
	
* FOR NOW LIMIT TO MONTHS ELAPSED THAT WE HAVE OTHER OUTCOMES FOR
	keep if v42 <47
	
* OUTCOME MEASURES
	keep v5 v7 v8 v6 v9 v10 v3 v42
	
	gen app = 1
	gen v11 = 0
	foreach r in A1 A2 A3 A5 A4 A5 A6 A7 D1 NEW RECALCULATE RECERTIFY CHANGEPLAN {
		recode v11 (0 = 1) if v7 =="`r'"
	}

* DOUBLE RESHAPE AND FILL IN 12 MONTHS AFTER SUCCESSFUL RECERTS TO DISTINGUISH BETWEEN NPFH STAYING ON IDR AND NONRECERTS
	keep v11 v8 v6 v9 v10 v3 v12 v5 v42
	
	reshape wide v11 v8 v6 v9 v10 v3 v12, i(v5) j(v42)
	reshape long v11 v8 v6 v9 v10 v3 v12, i(v5) j(v42)
	
	recode v12 (. = 0)
	recode v11 (. = 0)
	gen v11_v12 = v11*v12
	
	gen v11_idr= 0
	forv m = 0/12 {
		bysort v5 (v42): replace v11_idr =1  if v11_v12[_n-`m'] ==1 & v5 == v5[_n-`m'] & v42 == v42[_n-`m']+`m'
	}
	
	keep v5 v42 v12 v11_v12 v11_idr v6 v3 v10
	
	tempfile app`i'
	save app`i', replace

* CLOSE LOOP OVER APP YEARS	
}

* APPEND YEARS
forv i = 2015/2017 {
	append using app`i'
}

* SAVE FILE WITH ALL 4 APP YEARS FOR $6500 BW RD SAMPLE
compress
save "${working}\app", replace

* ERASE ANNUAL FILES
forv i = 2015/2018 {
	erase app`i'.dta
}

** TIME SERIES OF APPLICATIONS
binscatter app v11_app v42 if v42 >0, n(44) line(none) m(Oh O ) mc(midblue dknavy ) ///
	xti(Months since first application) xlab(1(5)46, labs(small)) yti(Percent of borrowers) ylab(, gmin gmax labs(small)) ///
	legend(lab(1 Any application in month) lab(2 v11 application in month) size(small))
graph save "${output}\recert_by_v42.gph", replace


* SAVE ZIPPED VERSION 
clear
cd "${working}"
zipfile app, saving(app.dta, replace)
erase app.dta

clear 
cd "${raw}"

**#*******************************************************************************
*** BASELINE CHARS
** INITIAL APPLICATION

forv i = 2015/2018 {
	
	unzipfile     "idrrd_apps_fa`i'.csv.zip" , replace
	insheet using "idrrd_apps_fa`i'.csv", names clear 
	erase         "idrrd_apps_fa`i'.csv"
	
* ONLY KEEP FIRST APP
	keep if v2 == first_v2
	
	tempfile app`i'
	save `app`i'', replace
	
* CLOSE LOOP
}

use `app2015'
forv i = 2016/2018 {
	append using `app`i''
}

rename v6 v6_nominal
rename v3 v3_nominal

tempfile baseline_apps
save `baseline_apps', replace

unzipfile "idrrd_baseline_chars.csv.zip" , replace
insheet using "idrrd_baseline_chars.csv", names clear 
erase "idrrd_baseline_chars.csv"

drop d v3 

* INFLATION ADJ RUNNING VAR_
merge 1:1 v5 using "${working}\treatment" , keep(3) nogen

* INITIAL APPLICATION
merge 1:1 v5 using `baseline_apps', keep(1 3) nogen

* PUT RUNNING VAR IN $1K TERMS	
replace v3 = v3/1000
replace dv3 = dv3/1000
		 
* ACADEMIC LEVEL INDICATORS
xi, noomit prefix(A) i.v13


gen drt = v14 == "Y"
gen v26 = v15 == "Y"
gen v33 = v16 == "Y"

gen inc_src_paper  = v17 == "Paper Tax Form"
gen inc_src_drt    = v17 == "SA.gov - DRT"
gen inc_src_adoi   = v17 == "Self Attested - ADOI Provided"
gen inc_src_noadoi = v17 == "Self Attested - No ADOI (0 Income)"

* AUTO DEBT 6 MONTHS BEFORE APP
gen any_ad_pre6 = v18 ==1

* SERVICER IN PRE PERIOD (USE MONTH PRIOR TO INITIAL APP, IF MISSING 2 MONTHS PRIOR, ETC.)
forv i = 1/3 {

loc j = `i'*-1
preserve
	
	unzipfile "idrrd_evar1_v42`j'.csv.zip" , replace
	insheet using "idrrd_evar1_v42`j'.csv", names clear 
	erase "idrrd_evar1_v42`j'.csv"

	keep v5 v19
	
	gen v19_pre = v19
	rename v19 servicer_v42_pre`i'
	
	recode v19_pre (0 1 34 38 47 50 555 = .)
	
	tempfile temp
	save `temp', replace

restore

merge 1:1 v5 using `temp', update keep(1 3) nogen

}

recode v19_pre (. = 555) if inlist(servicer_v42_pre1,43,555) | inlist(servicer_v42_pre2,43,555) |  inlist(servicer_v42_pre3,43,555) 

* FAFSA CHARS
preserve

	unzipfile "idrrd_fafsa.csv.zip" , replace
	insheet using "idrrd_fafsa.csv", names clear 
	erase "idrrd_fafsa.csv"

	drop if award_yr >2018
	
	bysort v5 (award_yr ): gen first = _n==1
	gen dum = v21 == "2" if v21 ~=""
	bysort v5: egen female = max(dum)
	drop dum
	gen miss_gender = female ==.
	recode female (. =0)
	keep if first == 1
	recode v22 (9999999 = .)
	gen miss_v22 = v22==.
	
	* ADJUST FAMILY INCOME FOR INFLATION (JAN 2022 $)	
	rename award_yr year
	gen month = 1
	merge m:1 year month using "${working}\cpi", keep(1 3) nogen
	loc num = 281.148
	replace cpi = `num'/cpi
	replace v22 = round(v22*cpi)
	
	rename year year_first_fafsa
	
	keep female miss_gender v22 miss_v22 v23 v5 year_first_fafsa
	tempfile temp
	save `temp', replace

restore

merge 1:1 v5 using `temp', update 
keep if inlist(_m,1,3)
gen miss_fafsa = _m==1
drop _m

gen age_at_app = age(mdy(real(substr(v23,6,2)),1,real(substr(v23,1,4))), mdy(real(substr(v2,6,2)),1,real(substr(v2,1,4))))

replace v22 = . if year_first_fafsa>v1
replace miss_v22 = 1 if v22 ==.
recode miss_gender (. = 1)


keep v5 v24 v25* v1 v3 donut v40 cpi d dv3 Y* inc_src* ///
	v7 drt v26 v8 v27 v28 v29 ///
	v30 bd_v30 v32 v33 v34 v35 ///
	v10 v36 v3_nominal v9 A* /*years_since_mature*/ ///  
	v19_pre v37 age_at_app miss_fafsa miss_gender female v22 miss_v22 ///
	year_first_fafsa
	
compress

save "${working}\baseline", replace	
	
**#*******************************************************************************
*** PROCESS ENROLLMENT DATA 
** MONTH AND YEAR OF FIRST APPLICATION FOR $6500 BW RD SAMPLE
unzipfile     "idrrd_moel_0.csv.zip" , replace
insheet using "idrrd_moel_0.csv", names clear 
erase         "idrrd_moel_0.csv"

keep v5 v2 v3

* ADJUST FOR INFLATION
gen year = real(substr(v2,1,4))
gen month = real(substr(v2,6,2))
	
merge m:1 year month using "${working}\cpi", keep(1 3) nogen
	
* JAN 2022 $	
loc num = 281.148
	
replace cpi = `num'/cpi
	
replace v3 = round(v3*cpi)
		
* KEEP $6500 BW RD SAMPLE
keep if abs(v3)<6500
	
drop v3 cpi
		
save samp, replace

** LOOP OVER ANNUAL FILES TO CREATE MONTHLY "LONG" FILE

forv i = 2015/2018 {
	
	unzipfile     "idrrd_nsldsenrl_fa`i'.csv.zip" , replace
	insheet using "idrrd_nsldsenrl_fa`i'.csv", names clear 
	erase         "idrrd_nsldsenrl_fa`i'.csv"
	
	merge m:1 v5 using samp, update
	keep if _m == 3
	drop _m
	
* ONLY KEEP RECORDS THAT CORRESPOND TO ENROLLMENT SPELLS
	foreach t in A D M W X Z {
		drop if v38 == "`t'"
	}

	gen grad = v38 == "G"
	
* ENROLLMENT SPELL 	
	bysort v5 grad (v39): gen spell = _n	
	
* REPLACE "9999" END DATES WITH MAY 1, 2023
	replace v40 = "2023-05-31" if v40 == "9999-12-31"	 & v38 ~="G"
	
	gen v42_st = ym(real(substr(v39 ,1,4)), real(substr(v39 ,6,2))) - ///
		ym(real(substr(v2,1,4)), real(substr(v2,6,2)))

	gen v41_end = ym(real(substr(v40,1,4)), real(substr(v40,6,2))) - ///
		ym(real(substr(v2,1,4)), real(substr(v2,6,2)))

	replace v41_end = . if v38 == "G"

* PUT GRADUATION RECORDS IN A DIFFERENT FILE
preserve

	keep if grad == 1
	rename v42_st v42
	keep v5 v42 grad v43
	
* FOR NOW, DROP GRADUATION OBS FROM PRE-APPLICATION PERIOD (MAY WANT TO COME BACK TO ADD TO DEMS LATER)	
	drop if v42<0 
	
	tempfile grad`i'
	save grad`i', replace

restore

	drop if grad == 1
	drop grad v1 

* FOR NOW, DROP ENROLLMENT SPELLS THAT ENDED MORE THAN 12 MONTHS BEFORE FIRST APPLICATION
	drop if v42_end<-13

	replace v42_st = -13 if v42_st<-12

* REPLACE SPELLS THAT HAVE END DATE OF  "9999-12-31" & START DATE BEFORE THE FIRST APPLICATION
	drop if v40 == "2023-05-31" & v42_st < -13

	drop v39 v40 v2

	egen group = group(v5 spell)
	rename v42_st v42

	tsset group v42

	tsfill, full

	foreach v of var v5 v43 spell v42_end {
		bysort group (v42): replace `v' = `v'[_n-1] if `v' == . & `v'[_n-1] ~=. & group == group[_n-1] & v42 == v42[_n-1] + 1
	}

	bysort group (v42): replace v38 = v38[_n-1] if v38== "" & v38[_n-1] ~="" & group == group[_n-1] & v42 == v42[_n-1] + 1
	replace v38 = "" if v42>v42_end
	replace v43 = . if v42>v42_end

* INDICATORS FOR ENROLLMENT INTENSITY
	foreach t in F H Q L {
		gen ENRL_`t' = v38 == "`t'"
	}

* NUMERIC ENROLLMENT INTENSITY
	gen    enrl_n = 1         if v38 == "F"
	recode enrl_n (. = 0.5)   if v38 == "H"
	recode enrl_n (. = 0.25)  if v38 == "Q"
	recode enrl_n (. = 0.125) if v38 == "L"
	recode enrl_n (. = -999)

* FOR NOW, ONLY KEEP SCHOOL ID WITH HIGHEST ENROLLMENT INTENSITY IN MONTH
	gsort v5 v42 -enrl_n

	recode enrl_n (-999 = .)

	collapse (first) v43 (max) ENRL_* (sum) enrl_n, by(v5 v42)

	drop if v42 == -13

	tempfile enrl`i'
	save `enrl`i'', replace
	
* CLOSE LOOP OVER APP YEARS
}

use `enrl2015', clear
forv i = 2016/2018 {
	append using `enrl`i''
}

* FOR NOW, DROP OBS THAT ARE OUTSIDE OF PANEL
drop if v42>46

* TOPCODE NUMERIC ENROLLMENT INTENSITY
replace enrl_n = 1 if enrl_n>1  

gen any_enrl = enrl_n >0

drop if v5 == .
compress

save"${working}\enrl", replace


use grad2015, clear

forv i = 2016/2018 {
	append using grad`i'
}

* FOR NOW, DROP OBS THAT ARE OUTSIDE OF PANEL
drop if v42>46

bysort v5 v42: gen num = _n
reshape wide v43, i(v5 v42) j(num)

tsset v5 v42

tsfill, full

rename grad grad_in_mo
	
gen anygrad = grad_in_mo
replace anygrad = 0 if v42 ==0	
bysort v5 (v42) : replace anygrad = 1 if anygrad== . & anygrad[_n-1] == 1 & v5 == v5[_n-1] & v42 == v42[_n-1]+1  
recode anygrad (. = 0)
recode grad_in_mo (. =0)	

compress

save "${working}\grad", replace

forv i = 2015/2018 {
	erase grad`i'.dta
}

**#*******************************************************************************
*** COLLAPSED DATA SET FOR REGRESSIONS 
	
/*********************************************************************
* 1) PAYMENTS, DELINQ, DEF, FORBS
*********************************************************************/

** CREATE BI-ANNUAL FILES 
forv c = 0(1)8 {

loc start = -11
loc end = -6

loc start = `start'+(`c'*6)
loc end = `end'+(`c'*6)
	
forv i = `start'/`end' {
	
	
* SERVICER ID AND AUTO DEBIT IND
	unzipfile     "idrrd_evar1_v42`i'.csv.zip" , replace
	insheet using "idrrd_evar1_v42`i'.csv", names clear 
	erase         "idrrd_evar1_v42`i'.csv"	
	keep v5 v44
	tempfile temp
	save `temp', replace
	
	unzipfile     "idrrd_v42_`i'.csv.zip" , replace
	insheet using "idrrd_v42_`i'.csv", names clear 
	erase         "idrrd_v42_`i'.csv"
	
	merge 1:1 v5 using `temp', keep(3) nogen

* ADJUST SCHEDULED PAYMENT AMOUNT FOR INFLATION
	gen month = real(substr(v45,6,2))
	gen year  = real(substr(v45,1,4))
	
	merge m:1 year month using "${working}\cpi", keep(3) nogen
	
* JAN 2022 $	
	loc num = 281.148
	
	replace cpi = `num'/cpi
	
	foreach v of var v46 v4 v47 v48 {
		replace `v' = round(`v'*cpi)
	}
	
	gen auto_debit = v44==1
	
	keep v5 v42 v46 v460 stat_* v50 v4  v47 v48 auto_debit
		
if `i' >`start' {
	append using "${working}\payments_`c'"
}
	save "${working}\payments_`c'", replace

* CLOSE LOOP OVER v42	
}

* CLOSE LOOP OVER 6-MONTH INTERVALS
}

** COMBINE BI-ANNUAL FILES INTO ONE COLLAPSED DATA SET
* LOOP OVER BI-ANNUAL PAYMENTS FILES
forv c = 0/8 {
	
	use "${working}\payments_`c'", clear

* RECODE MISSING SCHEDULED AMOUNT IF IN STATUSES WE KNOW HAVE $0 PAYMENT	
	recode v46 (. = 0) if v51 == 1 | v52 == 1 | v53 == 1 ///
		| v54 ==1  | v55 ==1 | v56 == 1 ///
		| v53 == 1 | v4 ==0 | v58  == 1 | v59  == 1
	replace v460 = 1 if v46 ==0
	
* SET DEFAULTED BORROWERS SCHEDULED AMOUNT TO MISSING 
* (ENTIRE BALANCE BECOMES DUE WHEN DEFAULT LEAVING SOME CRAZY VALUES)
	replace v46 = . if v61 ==1
	replace v460 = . if v61 == 1	

* COLLAPSE 
	gen period = `c'

	gen num_months = 1

	egen stat_forb_any = rowmax(v54 v55 v56)

	collapse (mean) av_amt_ = v46 pct_sched0_ = v460 v4_ = v4  ///
			pct_deflt_ = v61 pct_delinq_ = v50 pct_forbdc_ = v54 ///
			pct_forbma_ = v55 pct_forbad_ = v56 pct_forbany_ = stat_forb_any ///
			pct_grace_ = v51 pct_repay_ = v60 pct_defrl_ = v52 ///
		 (sum) v62 = v46 months_sched0_ = v460 v63 = num_months ///
		 (max) any_sched0_ = v460 any_deflt_ = v61 any_delinq_ = v50  ///
		    any_forb_dc_ = v54 any_forb_ma_ = v55 any_forb_ad_ = v54 ///
			any_defrl = v52 auto_debit, by(v5 period)

	tempfile temp`c'
	save `temp`c'', replace
	
* CLOSE LOOP OVER PAYMENT FILES
}

use `temp0', clear
forv i = 1/8 {
	append using `temp`i''
}
						

* CONVERT TO WIDE - ONE OB PER BORROWER
reshape wide av_amt_ pct* v62  months_* any_* v63 v4_ auto_debit, i(v5) j(period)

* INFLATION ADJ RUNNING VAR
merge m:1 v5 using "${working}\treatment", keep(3) nogen

* PUT RUNNING VAR IN $1K TERMS	
replace v3 = v3/1000
replace dv3 = dv3/1000

* BASELINE CHARS
merge m:1 v5 using "${working}\baseline", keep(1 3) nogen


compress

*period 0 "-11 to -6" 1 "-5 to -0" 2 "1 to 6" 3 "7 to 12" 4 "13 to 18" ///
*	5 "19 to 24" 6 "25 to 30" 7 "31 to 36" 8 "37 to 42" 
	
save "${working}\collapse", replace

/*********************************************************************
* 2) IDR PARTICIPATION + APPLICATION DATA
*********************************************************************/

** CONVERT APPLICATIONS DATA INTO MONTHLY DATA SETS
forv i = 2015/2018 {
		
	unzipfile     "idrrd_apps_fa`i'.csv.zip" , replace
	insheet using "idrrd_apps_fa`i'.csv", names clear 
	erase         "idrrd_apps_fa`i'.csv"
	
* NUMBER OF MONTHS BETWEEN APPLICATION AND INITIAL APPLICATION
	gen v42 = ym(real(substr(v2 ,1,4)), real(substr(v2 ,6,2))) - ///
		ym(real(substr(first_v2 ,1,4)), real(substr(first_v2  ,6,2)))

* FOR NOW LIMIT TO MONTHS ELAPSED THAT WE HAVE OTHER OUTCOMES FOR
	keep if v42 <47
	
* OUTCOME MEASURES
	keep v5 v7 v6 v10 v42
	
	gen app = 1
	gen v11 = 0
	foreach r in A1 A2 A3 A5 A4 A5 A6 A7 D1 NEW RECALCULATE RECERTIFY CHANGEPLAN {
		recode v11 (0 = 1) if v7 =="`r'"
	}

	reshape wide v11 v6 v10 app v7, i(v5) j(v42)
	reshape long v11 v6 v10 app v7, i(v5) j(v42)
	
	recode app (. = 0)
	recode v11 (. = 0)
	gen v11_app = v11*app
	
	gen v11_idr= 0
	forv m = 0/12 {
		bysort v5 (v42): replace v11_idr =1  if v11_app[_n-`m'] ==1 & v5 == v5[_n-`m'] & v42 == v42[_n-`m']+`m'
	}
	
	merge m:1 v5 using "${working}\treatment" 
	keep if _m ==3 
	drop _m
	
	keep v5 v42 v7 v6 app v11_app v11_idr 
	
	
	tempfile app`i'
	save `app`i'', replace

* CLOSE LOOP OVER APP YEARS	
}

* APPEND YEARS
forv i = 2015/2017 {
	append using `app`i''
}

tempfile app
save `app', replace

** CREATE BI-ANNUAL FILES 
forv c = 2(1)8 {

loc start = -11
loc end = -6

loc start = `start'+(`c'*6)
loc end = `end'+(`c'*6)

* LOOP OVER v42 WITHIN BI-ANNUAL PERIOD
forv i = `start'/`end' {
	
	unzipfile     "idrrd_moel_`i'.csv.zip" , replace
	insheet using "idrrd_moel_`i'.csv", names clear 
	erase         "idrrd_moel_`i'.csv"
	
	rename v72 v42
	merge 1:1 v5 v42 using `app', update 
	keep if _m>=3
	drop _m
		
	egen v66 = rowmax(v64 v65)
	egen plan_idr = rowmax(v66 v67 v68 v69  v70 v71 )
	gen plan_idr_v11 = plan_idr*v11_idr
	
* COMBINED IBR AND PAYE 
	egen plan_ibrpaye_pfh = rowmax(v70 v68)
	egen plan_ibrpaye_nopfh = rowmax(v71 v69)
	
	keep v5 v42 v66 v67 plan_ibrpaye_* plan_idr plan_idr_v11 v12 v11_app inc v7
		
if `i' >`start' {
	append using "${working}\apps_`c'"
}
	save "${working}\apps_`c'", replace

* CLOSE LOOP OVER v42	
}

* CLOSE LOOP OVER 6-MONTH INTERVALS
}


** COMBINE BI-ANNUAL FILES INTO ONE COLLAPSED DATA SET
* LOOP OVER BI-ANNUAL PAYMENTS FILES
forv c = 2/8 {
	
	use "${working}\apps_`c'", clear
	
* COLLAPSE TO QUARTER LEVEL
	gen period = `c'

	gen v63plan = 1

	collapse (mean) pct_idr_ = plan_idr_v11 pct_icr_ = v66 pct_repaye_ = v67 ///
		pct_ibrpaye_pfh_ = plan_ibrpaye_pfh pct_ibrpaye_nopfh_ = plan_ibrpaye_nopfh ///
		(max) any_idr_ = plan_idr_v11 any_icr = v66 any_repaye = v67 ///
		any_ibrpaye_pfh_ = plan_ibrpaye_pfh any_ibrpaye_nopfh_ = plan_ibrpaye_nopfh ///
		any_app_ = app any_v11_app_ = v11_app (sum) v63plan, by(v5 period)

		
	tempfile temp`c'
	save `temp`c'', replace
	
* CLOSE LOOP OVER PAYMENT FILES
}

use `temp2', clear
forv i = 3/8 {
	append using `temp`i''
}
	
reshape wide pct_* any_*  v63plan, i(v5) j(period)

merge 1:1 v5 using "${working}\collapse", update 


save "${working}\collapse", replace


/*********************************************************************
* 3) ENROLLMENT + GRADUATION DATA
*********************************************************************/
** MONTH AND YEAR OF FIRST APPLICATION 
unzipfile     "idrrd_moel_0.csv.zip" , replace
insheet using "idrrd_moel_0.csv", names clear 
erase         "idrrd_moel_0.csv"

keep v5 v2 

tempfile samp		
save `samp', replace

** LOOP OVER ANNUAL FILES TO CREATE MONTHLY "LONG" FILE

forv i = 2015/2018 {
	
	unzipfile     "idrrd_nsldsenrl_fa`i'.csv.zip" , replace
	insheet using "idrrd_nsldsenrl_fa`i'.csv", names clear 
	erase         "idrrd_nsldsenrl_fa`i'.csv"
	
	merge m:1 v5 using `samp', nogen keep(3)
	
* ONLY KEEP RECORDS THAT CORRESPOND TO ENROLLMENT SPELLS
	foreach t in A D M W X Z {
		drop if v38 == "`t'"
	}

	gen grad = v38 == "G"
	
* ENROLLMENT SPELL 	
	bysort v5 grad (v39): gen spell = _n	
	
* REPLACE "9999" END DATES WITH MAY 1, 2023
	replace v40 = "2023-05-31" if v40 == "9999-12-31"	 & v38 ~="G"
	
	gen v42_st = ym(real(substr(v39 ,1,4)), real(substr(v39 ,6,2))) - ///
		ym(real(substr(v2,1,4)), real(substr(v2,6,2)))

	gen v42_end = ym(real(substr(v40,1,4)), real(substr(v40,6,2))) - ///
		ym(real(substr(v2,1,4)), real(substr(v2,6,2)))

	replace v42_end = . if v38 == "G"

* PUT GRADUATION RECORDS IN A DIFFERENT FILE
preserve

	keep if grad == 1
	rename v42_st v42
	keep v5 v42 grad v43
	
* FOR NOW, DROP GRADUATION OBS FROM PRE-APPLICATION PERIOD (MAY WANT TO COME BACK TO ADD TO DEMS LATER)	
	drop if v42<0 
	
	tempfile grad`i'
	save `grad`i'', replace

restore

	drop if grad == 1
	drop grad v1 

* FOR NOW, DROP ENROLLMENT SPELLS THAT ENDED MORE THAN 12 MONTHS BEFORE FIRST APPLICATION
	drop if v42_end<-13

	replace v42_st = -13 if v42_st<-12

* REPLACE SPELLS THAT HAVE END DATE OF  "9999-12-31" & START DATE BEFORE THE FIRST APPLICATION
	drop if v40 == "2023-05-31" & v42_st < -13

	drop v39 v40 v2

	egen group = group(v5 spell)
	rename v42_st v42

	tsset group v42

	tsfill, full

	foreach v of var v5 v43 spell v42_end {
		bysort group (v42): replace `v' = `v'[_n-1] if `v' == . & `v'[_n-1] ~=. & group == group[_n-1] & v42 == v42[_n-1] + 1
	}

	bysort group (v42): replace v38 = v38[_n-1] if v38== "" & v38[_n-1] ~="" & group == group[_n-1] & v42 == v42[_n-1] + 1
	replace v38 = "" if v42>v42_end
	replace v43 = . if v42>v42_end

* INDICATORS FOR ENROLLMENT INTENSITY
	foreach t in F H Q L {
		gen ENRL_`t' = v38 == "`t'"
	}

* NUMERIC ENROLLMENT INTENSITY
	gen    enrl_n = 1         if v38 == "F"
	recode enrl_n (. = 0.5)   if v38 == "H"
	recode enrl_n (. = 0.25)  if v38 == "Q"
	recode enrl_n (. = 0.125) if v38 == "L"
	recode enrl_n (. = -999)

* FOR NOW, ONLY KEEP SCHOOL ID WITH HIGHEST ENROLLMENT INTENSITY IN MONTH
	gsort v5 v42 -enrl_n

	recode enrl_n (-999 = .)

	collapse (first) v43 (max) ENRL_* (sum) enrl_n, by(v5 v42)

	drop if v42 == -13

	tempfile enrl`i'
	save `enrl`i'', replace
	
* CLOSE LOOP OVER APP YEARS
}

use `enrl2015', clear
forv i = 2016/2018 {
	append using `enrl`i''
}

* FOR NOW, DROP OBS THAT ARE OUTSIDE OF PANEL
drop if v42>46

* TOPCODE NUMERIC ENROLLMENT INTENSITY
replace enrl_n = 1 if enrl_n>1 & enrl_n~=.

gen any_enrl = enrl_n >0 if enrl_n ~=.

drop if v5 == .
compress

tempfile enrl
save `enrl', replace

bysort v5 v42: gen count = _N
assert count == 1
drop count

use `grad2015', clear

forv i = 2016/2018 {
	append using `grad`i''
}
* FOR NOW, DROP OBS THAT ARE OUTSIDE OF PANEL
drop if v42>46

bysort v5 v42: gen num = _n
reshape wide v43, i(v5 v42) j(num)

tsset v5 v42

tsfill, full

rename grad grad_in_mo
	
gen anygrad = grad_in_mo
replace anygrad = 0 if v42 ==0	
bysort v5 (v42) : replace anygrad = 1 if anygrad== . & anygrad[_n-1] == 1 & v5 == v5[_n-1] & v42 == v42[_n-1]+1  
recode anygrad (. = 0)
recode grad_in_mo (. =0)	

compress

merge 1:1 v5 v42 using `enrl', update 

* COLLAPSE TO 6-MONTH INTERVALS
gen period = .


loc counter = -1
foreach i of num -11(6)37 {
	loc j = `i'+6
	loc counter = `counter'+1
	recode period (. = `counter') if v42 >=`i' & v42 < `j'
}

gen v63enrl = 1

collapse (max) grad_in_period_ = grad_in_mo any_grad_td_ = anygrad any_enrl_f_ = ENRL_F ///
	any_enrl_h_ = ENRL_H any_enrl_q_ = ENRL_Q any_enrl_l_ = ENRL_L  max_enrl_int_ = enrl_n enrl_in_period_ = any_enrl ///
	(mean) pct_enrl_ = any_enrl ///
	(sum) v63enrl months_enrl_f_ = ENRL_F months_enrl_h_ = ENRL_H ///
	months_enrl_q_ = ENRL_Q months_enrl_l_ = ENRL_L months_enrl_ = any_enrl months_enrl_n_ = enrl_n , by(v5 period)

drop if period ==.

recode grad_in_period_ (. = 0) 
recode any_grad_td_ (. =0)
	
reshape wide pct_enrl_ grad_in_period_ any_* months_* enrl_in_period_ max_enrl_int_ v63enrl, i(v5) j(period)

merge 1:1 v5 using "${working}\collapse", update nogen

foreach v in pct_enrl_ grad_in_period_ any_grad_td_ any_enrl_f_ any_enrl_h_ any_enrl_q_ any_enrl_l_ max_enrl_int_ enrl_in_period_ ///
	pct_enrl_ v63enrl months_enrl_f_ months_enrl_h_ months_enrl_q_ months_enrl_l_ months_enrl_ months_enrl_n_ {

*forv i = 0/6 {
*	recode `v'`i' (. = 0)
*}

forv i = 7/8 {
recode `v'`i' (. = 0) if pct_idr_`i' ~=.
}
	}

compress

save "${working}\collapse", replace

